Feature Effect Evaluation¶
PDP¶
Error of Model-PD compared to groundtruth-PD¶
In [4]:
effects_results_storage = config.get("storage", "effects_results")
df = pd.read_sql_table("pdp_results", f"sqlite:///..{effects_results_storage}")
df
Out[4]:
| index | model_id | model | simulation | n_train | noise_sd | metric | x_1 | x_2 | x_3 | x_4 | x_5 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | RandomForestRegressor_20240413_1_1000_0.1 | RandomForestRegressor | 1 | 1000 | 0.1 | mean_squared_error | 0.178899 | 0.203574 | 0.672312 | 0.110749 | 0.088638 |
| 1 | 0 | XGBRegressor_20240413_1_1000_0.1 | XGBRegressor | 1 | 1000 | 0.1 | mean_squared_error | 0.016875 | 0.015341 | 0.020013 | 0.017393 | 0.006096 |
| 2 | 0 | DecisionTreeRegressor_20240413_1_1000_0.1 | DecisionTreeRegressor | 1 | 1000 | 0.1 | mean_squared_error | 0.187063 | 0.168292 | 0.797859 | 0.159578 | 0.091377 |
| 3 | 0 | SVR_20240413_1_1000_0.1 | SVR | 1 | 1000 | 0.1 | mean_squared_error | 0.000020 | 0.000053 | 0.000271 | 0.000044 | 0.000185 |
| 4 | 0 | ElasticNet_20240413_1_1000_0.1 | ElasticNet | 1 | 1000 | 0.1 | mean_squared_error | 0.851548 | 0.953455 | 2.296132 | 0.000909 | 0.001021 |
| 5 | 0 | GAM_20240413_1_1000_0.1 | GAM | 1 | 1000 | 0.1 | mean_squared_error | 0.000283 | 0.000279 | 0.000105 | 0.000311 | 0.000201 |
| 6 | 0 | RandomForestRegressor_20240413_1_1000_0.5 | RandomForestRegressor | 1 | 1000 | 0.5 | mean_squared_error | 0.186385 | 0.211262 | 0.643921 | 0.101243 | 0.095049 |
| 7 | 0 | XGBRegressor_20240413_1_1000_0.5 | XGBRegressor | 1 | 1000 | 0.5 | mean_squared_error | 0.021464 | 0.024066 | 0.027774 | 0.021291 | 0.013897 |
| 8 | 0 | DecisionTreeRegressor_20240413_1_1000_0.5 | DecisionTreeRegressor | 1 | 1000 | 0.5 | mean_squared_error | 0.137864 | 0.300960 | 0.656510 | 0.134170 | 0.128621 |
| 9 | 0 | SVR_20240413_1_1000_0.5 | SVR | 1 | 1000 | 0.5 | mean_squared_error | 0.000672 | 0.000998 | 0.001899 | 0.000696 | 0.001844 |
| 10 | 0 | ElasticNet_20240413_1_1000_0.5 | ElasticNet | 1 | 1000 | 0.5 | mean_squared_error | 0.851724 | 0.953386 | 2.298243 | 0.001387 | 0.001388 |
| 11 | 0 | GAM_20240413_1_1000_0.5 | GAM | 1 | 1000 | 0.5 | mean_squared_error | 0.001042 | 0.001591 | 0.002211 | 0.000689 | 0.000812 |
| 12 | 0 | RandomForestRegressor_20240413_2_1000_0.1 | RandomForestRegressor | 2 | 1000 | 0.1 | mean_squared_error | 0.148672 | 0.209911 | 0.646148 | 0.246621 | 0.108333 |
| 13 | 0 | XGBRegressor_20240413_2_1000_0.1 | XGBRegressor | 2 | 1000 | 0.1 | mean_squared_error | 0.015571 | 0.020568 | 0.023027 | 0.025449 | 0.011853 |
| 14 | 0 | DecisionTreeRegressor_20240413_2_1000_0.1 | DecisionTreeRegressor | 2 | 1000 | 0.1 | mean_squared_error | 0.099713 | 0.244102 | 0.755546 | 0.243398 | 0.139652 |
| 15 | 0 | SVR_20240413_2_1000_0.1 | SVR | 2 | 1000 | 0.1 | mean_squared_error | 0.000022 | 0.000057 | 0.000127 | 0.000123 | 0.000039 |
| 16 | 0 | ElasticNet_20240413_2_1000_0.1 | ElasticNet | 2 | 1000 | 0.1 | mean_squared_error | 1.033813 | 0.909835 | 2.302066 | 0.000719 | 0.000694 |
| 17 | 0 | GAM_20240413_2_1000_0.1 | GAM | 2 | 1000 | 0.1 | mean_squared_error | 0.000525 | 0.000353 | 0.000170 | 0.000855 | 0.000140 |
| 18 | 0 | RandomForestRegressor_20240413_2_1000_0.5 | RandomForestRegressor | 2 | 1000 | 0.5 | mean_squared_error | 0.151655 | 0.214597 | 0.626806 | 0.253600 | 0.113697 |
| 19 | 0 | XGBRegressor_20240413_2_1000_0.5 | XGBRegressor | 2 | 1000 | 0.5 | mean_squared_error | 0.015402 | 0.024618 | 0.032997 | 0.031710 | 0.017810 |
| 20 | 0 | DecisionTreeRegressor_20240413_2_1000_0.5 | DecisionTreeRegressor | 2 | 1000 | 0.5 | mean_squared_error | 0.155236 | 0.189773 | 0.943644 | 0.249308 | 0.165510 |
| 21 | 0 | SVR_20240413_2_1000_0.5 | SVR | 2 | 1000 | 0.5 | mean_squared_error | 0.002258 | 0.002048 | 0.001367 | 0.002167 | 0.001201 |
| 22 | 0 | ElasticNet_20240413_2_1000_0.5 | ElasticNet | 2 | 1000 | 0.5 | mean_squared_error | 1.032929 | 0.909313 | 2.303330 | 0.003641 | 0.000981 |
| 23 | 0 | GAM_20240413_2_1000_0.5 | GAM | 2 | 1000 | 0.5 | mean_squared_error | 0.001076 | 0.001281 | 0.002035 | 0.003880 | 0.000032 |
| 24 | 0 | RandomForestRegressor_20240413_3_1000_0.1 | RandomForestRegressor | 3 | 1000 | 0.1 | mean_squared_error | 0.138445 | 0.190675 | 0.881256 | 0.168130 | 0.129340 |
| 25 | 0 | XGBRegressor_20240413_3_1000_0.1 | XGBRegressor | 3 | 1000 | 0.1 | mean_squared_error | 0.013733 | 0.011819 | 0.021524 | 0.015494 | 0.009011 |
| 26 | 0 | DecisionTreeRegressor_20240413_3_1000_0.1 | DecisionTreeRegressor | 3 | 1000 | 0.1 | mean_squared_error | 0.208063 | 0.392589 | 0.704313 | 0.260510 | 0.124586 |
| 27 | 0 | SVR_20240413_3_1000_0.1 | SVR | 3 | 1000 | 0.1 | mean_squared_error | 0.000197 | 0.000151 | 0.000109 | 0.000019 | 0.000020 |
| 28 | 0 | ElasticNet_20240413_3_1000_0.1 | ElasticNet | 3 | 1000 | 0.1 | mean_squared_error | 0.969283 | 0.908738 | 2.296879 | 0.002681 | 0.007561 |
| 29 | 0 | GAM_20240413_3_1000_0.1 | GAM | 3 | 1000 | 0.1 | mean_squared_error | 0.000303 | 0.000099 | 0.000234 | 0.000564 | 0.000141 |
| 30 | 0 | RandomForestRegressor_20240413_3_1000_0.5 | RandomForestRegressor | 3 | 1000 | 0.5 | mean_squared_error | 0.137355 | 0.190433 | 0.890642 | 0.166069 | 0.132726 |
| 31 | 0 | XGBRegressor_20240414_3_1000_0.5 | XGBRegressor | 3 | 1000 | 0.5 | mean_squared_error | 0.019530 | 0.031508 | 0.025388 | 0.021775 | 0.013518 |
| 32 | 0 | DecisionTreeRegressor_20240414_3_1000_0.5 | DecisionTreeRegressor | 3 | 1000 | 0.5 | mean_squared_error | 0.162907 | 0.347728 | 0.773309 | 0.223763 | 0.130480 |
| 33 | 0 | SVR_20240414_3_1000_0.5 | SVR | 3 | 1000 | 0.5 | mean_squared_error | 0.000558 | 0.002804 | 0.001020 | 0.000614 | 0.000832 |
| 34 | 0 | ElasticNet_20240414_3_1000_0.5 | ElasticNet | 3 | 1000 | 0.5 | mean_squared_error | 0.968399 | 0.908582 | 2.296770 | 0.002834 | 0.007601 |
| 35 | 0 | GAM_20240414_3_1000_0.5 | GAM | 3 | 1000 | 0.5 | mean_squared_error | 0.001016 | 0.000920 | 0.004985 | 0.000800 | 0.000165 |
| 36 | 0 | RandomForestRegressor_20240414_4_1000_0.1 | RandomForestRegressor | 4 | 1000 | 0.1 | mean_squared_error | 0.179592 | 0.193935 | 0.946746 | 0.133848 | 0.152169 |
| 37 | 0 | XGBRegressor_20240414_4_1000_0.1 | XGBRegressor | 4 | 1000 | 0.1 | mean_squared_error | 0.015688 | 0.024212 | 0.030230 | 0.015045 | 0.011537 |
| 38 | 0 | DecisionTreeRegressor_20240414_4_1000_0.1 | DecisionTreeRegressor | 4 | 1000 | 0.1 | mean_squared_error | 0.192033 | 0.162543 | 1.002283 | 0.193901 | 0.105449 |
| 39 | 0 | SVR_20240414_4_1000_0.1 | SVR | 4 | 1000 | 0.1 | mean_squared_error | 0.000056 | 0.000135 | 0.000334 | 0.000082 | 0.000161 |
| 40 | 0 | ElasticNet_20240414_4_1000_0.1 | ElasticNet | 4 | 1000 | 0.1 | mean_squared_error | 1.051079 | 1.036128 | 2.296727 | 0.001188 | 0.000910 |
| 41 | 0 | GAM_20240414_4_1000_0.1 | GAM | 4 | 1000 | 0.1 | mean_squared_error | 0.000215 | 0.000419 | 0.000269 | 0.000514 | 0.000174 |
| 42 | 0 | RandomForestRegressor_20240414_4_1000_0.5 | RandomForestRegressor | 4 | 1000 | 0.5 | mean_squared_error | 0.172444 | 0.198991 | 0.975957 | 0.109655 | 0.161776 |
| 43 | 0 | XGBRegressor_20240414_4_1000_0.5 | XGBRegressor | 4 | 1000 | 0.5 | mean_squared_error | 0.024328 | 0.031237 | 0.032734 | 0.018225 | 0.017903 |
| 44 | 0 | DecisionTreeRegressor_20240414_4_1000_0.5 | DecisionTreeRegressor | 4 | 1000 | 0.5 | mean_squared_error | 0.180456 | 0.170292 | 0.867263 | 0.193355 | 0.117926 |
| 45 | 0 | SVR_20240414_4_1000_0.5 | SVR | 4 | 1000 | 0.5 | mean_squared_error | 0.000793 | 0.002795 | 0.001837 | 0.002153 | 0.002939 |
| 46 | 0 | ElasticNet_20240414_4_1000_0.5 | ElasticNet | 4 | 1000 | 0.5 | mean_squared_error | 1.056593 | 1.050310 | 2.297129 | 0.001332 | 0.002119 |
| 47 | 0 | GAM_20240414_4_1000_0.5 | GAM | 4 | 1000 | 0.5 | mean_squared_error | 0.000784 | 0.002298 | 0.004507 | 0.000387 | 0.000618 |
| 48 | 0 | RandomForestRegressor_20240414_5_1000_0.1 | RandomForestRegressor | 5 | 1000 | 0.1 | mean_squared_error | 0.265733 | 0.138409 | 0.731703 | 0.204957 | 0.149158 |
| 49 | 0 | XGBRegressor_20240414_5_1000_0.1 | XGBRegressor | 5 | 1000 | 0.1 | mean_squared_error | 0.017781 | 0.012037 | 0.028900 | 0.021594 | 0.012021 |
| 50 | 0 | DecisionTreeRegressor_20240414_5_1000_0.1 | DecisionTreeRegressor | 5 | 1000 | 0.1 | mean_squared_error | 0.137495 | 0.268633 | 0.916016 | 0.227593 | 0.142377 |
| 51 | 0 | SVR_20240414_5_1000_0.1 | SVR | 5 | 1000 | 0.1 | mean_squared_error | 0.000176 | 0.000137 | 0.000285 | 0.000107 | 0.000255 |
| 52 | 0 | ElasticNet_20240414_5_1000_0.1 | ElasticNet | 5 | 1000 | 0.1 | mean_squared_error | 0.964777 | 0.999703 | 2.300356 | 0.000762 | 0.007345 |
| 53 | 0 | GAM_20240414_5_1000_0.1 | GAM | 5 | 1000 | 0.1 | mean_squared_error | 0.000326 | 0.000252 | 0.000587 | 0.000428 | 0.000141 |
| 54 | 0 | RandomForestRegressor_20240414_5_1000_0.5 | RandomForestRegressor | 5 | 1000 | 0.5 | mean_squared_error | 0.279758 | 0.131032 | 0.662782 | 0.195756 | 0.151572 |
| 55 | 0 | XGBRegressor_20240414_5_1000_0.5 | XGBRegressor | 5 | 1000 | 0.5 | mean_squared_error | 0.021744 | 0.014687 | 0.030154 | 0.026170 | 0.013582 |
| 56 | 0 | DecisionTreeRegressor_20240414_5_1000_0.5 | DecisionTreeRegressor | 5 | 1000 | 0.5 | mean_squared_error | 0.151106 | 0.169537 | 0.756001 | 0.247949 | 0.111059 |
| 57 | 0 | SVR_20240414_5_1000_0.5 | SVR | 5 | 1000 | 0.5 | mean_squared_error | 0.000602 | 0.001949 | 0.004220 | 0.001232 | 0.000363 |
| 58 | 0 | ElasticNet_20240414_5_1000_0.5 | ElasticNet | 5 | 1000 | 0.5 | mean_squared_error | 0.964049 | 0.999738 | 2.302917 | 0.000776 | 0.006847 |
| 59 | 0 | GAM_20240414_5_1000_0.5 | GAM | 5 | 1000 | 0.5 | mean_squared_error | 0.001349 | 0.001290 | 0.010604 | 0.001113 | 0.000435 |
In [5]:
%matplotlib inline
boxplot_feature_effect_results(features=["x_1", "x_2", "x_3", "x_4", "x_5"], df=df, effect_type="PDP");
PDP example visualizations¶
In [6]:
md(f"(simulation no. {sim_no} with n_train={n_train} and noise_sd={noise_sd})")
Out[6]:
(simulation no. 1 with n_train=1000 and noise_sd=0.1)
In [8]:
%matplotlib inline
plot_effect_comparison(rf, groundtruth, X_train, effect="PDP", features=['x_1', "x_2", "x_3", "x_4", "x_5"], config=config);
In [9]:
%matplotlib inline
plot_effect_comparison(xgb, groundtruth, X_train, effect="PDP", features=['x_1', "x_2", "x_3", "x_4", "x_5"], config=config);
In [10]:
%matplotlib inline
plot_effect_comparison(tree, groundtruth, X_train, effect="PDP", features=['x_1', "x_2", "x_3", "x_4", "x_5"], config=config);
In [11]:
%matplotlib inline
plot_effect_comparison(svm, groundtruth, X_train, effect="PDP", features=['x_1', "x_2", "x_3", "x_4", "x_5"], config=config);
In [12]:
%matplotlib inline
plot_effect_comparison(elasticnet, groundtruth, X_train, effect="PDP", features=['x_1', "x_2", "x_3", "x_4", "x_5"], config=config);
In [13]:
%matplotlib inline
plot_effect_comparison(gam, groundtruth, X_train, effect="PDP", features=['x_1', "x_2", "x_3", "x_4", "x_5"], config=config);
ALE¶
Error of Model-ALE compared to groundtruth-ALE¶
In [14]:
effects_results_storage = config.get("storage", "effects_results")
df = pd.read_sql_table("ale_results", f"sqlite:///..{effects_results_storage}")
df
Out[14]:
| index | model_id | model | simulation | n_train | noise_sd | metric | x_1 | x_2 | x_3 | x_4 | x_5 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | RandomForestRegressor_20240413_1_1000_0.1 | RandomForestRegressor | 1 | 1000 | 0.1 | mean_squared_error | 0.102209 | 0.078024 | 0.504765 | 0.045459 | 0.022509 |
| 1 | 0 | XGBRegressor_20240413_1_1000_0.1 | XGBRegressor | 1 | 1000 | 0.1 | mean_squared_error | 0.088313 | 0.032658 | 0.044790 | 0.033427 | 0.033133 |
| 2 | 0 | DecisionTreeRegressor_20240413_1_1000_0.1 | DecisionTreeRegressor | 1 | 1000 | 0.1 | mean_squared_error | 0.450111 | 0.341412 | 0.613983 | 2.657984 | 0.412017 |
| 3 | 0 | SVR_20240413_1_1000_0.1 | SVR | 1 | 1000 | 0.1 | mean_squared_error | 0.000037 | 0.000040 | 0.000218 | 0.000056 | 0.000214 |
| 4 | 0 | ElasticNet_20240413_1_1000_0.1 | ElasticNet | 1 | 1000 | 0.1 | mean_squared_error | 0.856908 | 1.085617 | 2.223995 | 0.000952 | 0.001000 |
| 5 | 0 | GAM_20240413_1_1000_0.1 | GAM | 1 | 1000 | 0.1 | mean_squared_error | 0.000334 | 0.000265 | 0.000089 | 0.000320 | 0.000190 |
| 6 | 0 | RandomForestRegressor_20240413_1_1000_0.5 | RandomForestRegressor | 1 | 1000 | 0.5 | mean_squared_error | 0.107340 | 0.078632 | 0.481248 | 0.041882 | 0.039674 |
| 7 | 0 | XGBRegressor_20240413_1_1000_0.5 | XGBRegressor | 1 | 1000 | 0.5 | mean_squared_error | 0.094882 | 0.087883 | 0.099508 | 0.028062 | 0.082043 |
| 8 | 0 | DecisionTreeRegressor_20240413_1_1000_0.5 | DecisionTreeRegressor | 1 | 1000 | 0.5 | mean_squared_error | 0.332879 | 0.326019 | 0.512040 | 0.473676 | 0.305222 |
| 9 | 0 | SVR_20240413_1_1000_0.5 | SVR | 1 | 1000 | 0.5 | mean_squared_error | 0.000317 | 0.001205 | 0.001456 | 0.000398 | 0.001531 |
| 10 | 0 | ElasticNet_20240413_1_1000_0.5 | ElasticNet | 1 | 1000 | 0.5 | mean_squared_error | 0.856576 | 1.088063 | 2.224006 | 0.000951 | 0.000866 |
| 11 | 0 | GAM_20240413_1_1000_0.5 | GAM | 1 | 1000 | 0.5 | mean_squared_error | 0.000803 | 0.001239 | 0.001782 | 0.000254 | 0.000386 |
| 12 | 0 | RandomForestRegressor_20240413_2_1000_0.1 | RandomForestRegressor | 2 | 1000 | 0.1 | mean_squared_error | 0.108499 | 0.088505 | 0.553368 | 0.113833 | 0.017661 |
| 13 | 0 | XGBRegressor_20240413_2_1000_0.1 | XGBRegressor | 2 | 1000 | 0.1 | mean_squared_error | 0.069223 | 0.066510 | 0.016483 | 0.031998 | 0.052082 |
| 14 | 0 | DecisionTreeRegressor_20240413_2_1000_0.1 | DecisionTreeRegressor | 2 | 1000 | 0.1 | mean_squared_error | 0.758521 | 0.379145 | 0.758179 | 1.960838 | 0.104751 |
| 15 | 0 | SVR_20240413_2_1000_0.1 | SVR | 2 | 1000 | 0.1 | mean_squared_error | 0.000037 | 0.000046 | 0.000136 | 0.000176 | 0.000047 |
| 16 | 0 | ElasticNet_20240413_2_1000_0.1 | ElasticNet | 2 | 1000 | 0.1 | mean_squared_error | 0.944539 | 0.992477 | 2.259133 | 0.000714 | 0.000681 |
| 17 | 0 | GAM_20240413_2_1000_0.1 | GAM | 2 | 1000 | 0.1 | mean_squared_error | 0.000256 | 0.000292 | 0.000179 | 0.000849 | 0.000138 |
| 18 | 0 | RandomForestRegressor_20240413_2_1000_0.5 | RandomForestRegressor | 2 | 1000 | 0.5 | mean_squared_error | 0.087106 | 0.091529 | 0.559728 | 0.105091 | 0.022528 |
| 19 | 0 | XGBRegressor_20240413_2_1000_0.5 | XGBRegressor | 2 | 1000 | 0.5 | mean_squared_error | 0.052993 | 0.062248 | 0.022821 | 0.032069 | 0.044360 |
| 20 | 0 | DecisionTreeRegressor_20240413_2_1000_0.5 | DecisionTreeRegressor | 2 | 1000 | 0.5 | mean_squared_error | 0.657025 | 2.455691 | 0.773211 | 0.891441 | 0.239885 |
| 21 | 0 | SVR_20240413_2_1000_0.5 | SVR | 2 | 1000 | 0.5 | mean_squared_error | 0.001267 | 0.001508 | 0.000990 | 0.001604 | 0.001187 |
| 22 | 0 | ElasticNet_20240413_2_1000_0.5 | ElasticNet | 2 | 1000 | 0.5 | mean_squared_error | 0.943744 | 0.991152 | 2.261525 | 0.003615 | 0.000959 |
| 23 | 0 | GAM_20240413_2_1000_0.5 | GAM | 2 | 1000 | 0.5 | mean_squared_error | 0.000709 | 0.001130 | 0.002092 | 0.003852 | 0.000032 |
| 24 | 0 | RandomForestRegressor_20240413_3_1000_0.1 | RandomForestRegressor | 3 | 1000 | 0.1 | mean_squared_error | 0.084576 | 0.174632 | 0.687282 | 0.080984 | 0.044223 |
| 25 | 0 | XGBRegressor_20240413_3_1000_0.1 | XGBRegressor | 3 | 1000 | 0.1 | mean_squared_error | 0.051716 | 0.009275 | 0.013223 | 0.032149 | 0.023333 |
| 26 | 0 | DecisionTreeRegressor_20240413_3_1000_0.1 | DecisionTreeRegressor | 3 | 1000 | 0.1 | mean_squared_error | 0.456843 | 0.483704 | 0.601405 | 0.296938 | 0.217799 |
| 27 | 0 | SVR_20240413_3_1000_0.1 | SVR | 3 | 1000 | 0.1 | mean_squared_error | 0.000176 | 0.000246 | 0.000082 | 0.000033 | 0.000023 |
| 28 | 0 | ElasticNet_20240413_3_1000_0.1 | ElasticNet | 3 | 1000 | 0.1 | mean_squared_error | 1.028461 | 0.980727 | 2.227729 | 0.002826 | 0.007909 |
| 29 | 0 | GAM_20240413_3_1000_0.1 | GAM | 3 | 1000 | 0.1 | mean_squared_error | 0.000408 | 0.000215 | 0.000224 | 0.000595 | 0.000148 |
| 30 | 0 | RandomForestRegressor_20240413_3_1000_0.5 | RandomForestRegressor | 3 | 1000 | 0.5 | mean_squared_error | 0.097168 | 0.163768 | 0.701964 | 0.081795 | 0.051585 |
| 31 | 0 | XGBRegressor_20240414_3_1000_0.5 | XGBRegressor | 3 | 1000 | 0.5 | mean_squared_error | 0.027236 | 0.026640 | 0.015414 | 0.031269 | 0.022480 |
| 32 | 0 | DecisionTreeRegressor_20240414_3_1000_0.5 | DecisionTreeRegressor | 3 | 1000 | 0.5 | mean_squared_error | 0.301233 | 0.257206 | 1.141532 | 0.448084 | 0.425581 |
| 33 | 0 | SVR_20240414_3_1000_0.5 | SVR | 3 | 1000 | 0.5 | mean_squared_error | 0.000716 | 0.003037 | 0.001180 | 0.000910 | 0.000937 |
| 34 | 0 | ElasticNet_20240414_3_1000_0.5 | ElasticNet | 3 | 1000 | 0.5 | mean_squared_error | 1.022676 | 0.980959 | 2.227729 | 0.002991 | 0.007931 |
| 35 | 0 | GAM_20240414_3_1000_0.5 | GAM | 3 | 1000 | 0.5 | mean_squared_error | 0.000905 | 0.001394 | 0.004704 | 0.000845 | 0.000172 |
| 36 | 0 | RandomForestRegressor_20240414_4_1000_0.1 | RandomForestRegressor | 4 | 1000 | 0.1 | mean_squared_error | 0.067455 | 0.131734 | 0.796294 | 0.037615 | 0.065405 |
| 37 | 0 | XGBRegressor_20240414_4_1000_0.1 | XGBRegressor | 4 | 1000 | 0.1 | mean_squared_error | 0.058053 | 0.071941 | 0.015962 | 0.037490 | 0.089937 |
| 38 | 0 | DecisionTreeRegressor_20240414_4_1000_0.1 | DecisionTreeRegressor | 4 | 1000 | 0.1 | mean_squared_error | 0.251189 | 0.250814 | 0.793227 | 0.414049 | 1.172958 |
| 39 | 0 | SVR_20240414_4_1000_0.1 | SVR | 4 | 1000 | 0.1 | mean_squared_error | 0.000075 | 0.000123 | 0.000288 | 0.000091 | 0.000137 |
| 40 | 0 | ElasticNet_20240414_4_1000_0.1 | ElasticNet | 4 | 1000 | 0.1 | mean_squared_error | 1.225967 | 0.968002 | 2.184606 | 0.001146 | 0.000868 |
| 41 | 0 | GAM_20240414_4_1000_0.1 | GAM | 4 | 1000 | 0.1 | mean_squared_error | 0.000220 | 0.000426 | 0.000248 | 0.000495 | 0.000164 |
| 42 | 0 | RandomForestRegressor_20240414_4_1000_0.5 | RandomForestRegressor | 4 | 1000 | 0.5 | mean_squared_error | 0.060786 | 0.146011 | 0.788864 | 0.036007 | 0.079909 |
| 43 | 0 | XGBRegressor_20240414_4_1000_0.5 | XGBRegressor | 4 | 1000 | 0.5 | mean_squared_error | 0.060250 | 0.068185 | 0.041078 | 0.048566 | 0.063810 |
| 44 | 0 | DecisionTreeRegressor_20240414_4_1000_0.5 | DecisionTreeRegressor | 4 | 1000 | 0.5 | mean_squared_error | 0.795661 | 0.524065 | 0.511875 | 0.671951 | 0.346964 |
| 45 | 0 | SVR_20240414_4_1000_0.5 | SVR | 4 | 1000 | 0.5 | mean_squared_error | 0.000692 | 0.002709 | 0.001586 | 0.002086 | 0.002741 |
| 46 | 0 | ElasticNet_20240414_4_1000_0.5 | ElasticNet | 4 | 1000 | 0.5 | mean_squared_error | 1.230699 | 0.978949 | 2.184610 | 0.001238 | 0.001969 |
| 47 | 0 | GAM_20240414_4_1000_0.5 | GAM | 4 | 1000 | 0.5 | mean_squared_error | 0.000418 | 0.002201 | 0.004158 | 0.000320 | 0.000533 |
| 48 | 0 | RandomForestRegressor_20240414_5_1000_0.1 | RandomForestRegressor | 5 | 1000 | 0.1 | mean_squared_error | 0.082736 | 0.110605 | 0.610456 | 0.152503 | 0.071705 |
| 49 | 0 | XGBRegressor_20240414_5_1000_0.1 | XGBRegressor | 5 | 1000 | 0.1 | mean_squared_error | 0.137153 | 0.118048 | 0.031283 | 0.070292 | 0.065766 |
| 50 | 0 | DecisionTreeRegressor_20240414_5_1000_0.1 | DecisionTreeRegressor | 5 | 1000 | 0.1 | mean_squared_error | 0.452512 | 0.360682 | 1.111724 | 1.219658 | 0.374757 |
| 51 | 0 | SVR_20240414_5_1000_0.1 | SVR | 5 | 1000 | 0.1 | mean_squared_error | 0.000037 | 0.000121 | 0.000236 | 0.000031 | 0.000208 |
| 52 | 0 | ElasticNet_20240414_5_1000_0.1 | ElasticNet | 5 | 1000 | 0.1 | mean_squared_error | 0.961997 | 1.045239 | 2.380695 | 0.000765 | 0.007340 |
| 53 | 0 | GAM_20240414_5_1000_0.1 | GAM | 5 | 1000 | 0.1 | mean_squared_error | 0.000359 | 0.000281 | 0.000510 | 0.000422 | 0.000125 |
| 54 | 0 | RandomForestRegressor_20240414_5_1000_0.5 | RandomForestRegressor | 5 | 1000 | 0.5 | mean_squared_error | 0.086741 | 0.107020 | 0.513056 | 0.141217 | 0.042970 |
| 55 | 0 | XGBRegressor_20240414_5_1000_0.5 | XGBRegressor | 5 | 1000 | 0.5 | mean_squared_error | 0.055380 | 0.080611 | 0.047591 | 0.095212 | 0.016603 |
| 56 | 0 | DecisionTreeRegressor_20240414_5_1000_0.5 | DecisionTreeRegressor | 5 | 1000 | 0.5 | mean_squared_error | 0.598115 | 0.313542 | 0.728063 | 4.416162 | 0.187126 |
| 57 | 0 | SVR_20240414_5_1000_0.5 | SVR | 5 | 1000 | 0.5 | mean_squared_error | 0.000359 | 0.002813 | 0.004640 | 0.001000 | 0.000200 |
| 58 | 0 | ElasticNet_20240414_5_1000_0.5 | ElasticNet | 5 | 1000 | 0.5 | mean_squared_error | 0.960468 | 1.047228 | 2.386402 | 0.000374 | 0.006440 |
| 59 | 0 | GAM_20240414_5_1000_0.5 | GAM | 5 | 1000 | 0.5 | mean_squared_error | 0.001030 | 0.001030 | 0.009240 | 0.000720 | 0.000024 |
In [15]:
%matplotlib inline
boxplot_feature_effect_results(features=["x_1", "x_2", "x_3", "x_4", "x_5"], df=df, effect_type="ALE");
ALE example visualizations¶
In [16]:
md(f"(simulation no. {sim_no} with n_train={n_train} and noise_sd={noise_sd})")
Out[16]:
(simulation no. 1 with n_train=1000 and noise_sd=0.1)
In [17]:
%matplotlib inline
plot_effect_comparison(rf, groundtruth, X_train, effect="ALE", features=['x_1', "x_2", "x_3", "x_4", "x_5"], config=config);
In [18]:
%matplotlib inline
plot_effect_comparison(xgb, groundtruth, X_train, effect="ALE", features=['x_1', "x_2", "x_3", "x_4", "x_5"], config=config);
In [19]:
%matplotlib inline
plot_effect_comparison(tree, groundtruth, X_train, effect="ALE", features=['x_1', "x_2", "x_3", "x_4", "x_5"], config=config);
In [20]:
%matplotlib inline
plot_effect_comparison(svm, groundtruth, X_train, effect="ALE", features=['x_1', "x_2", "x_3", "x_4", "x_5"], config=config);
In [21]:
%matplotlib inline
plot_effect_comparison(elasticnet, groundtruth, X_train, effect="ALE", features=['x_1', "x_2", "x_3", "x_4", "x_5"], config=config);
In [22]:
%matplotlib inline
plot_effect_comparison(gam, groundtruth, X_train, effect="ALE", features=['x_1', "x_2", "x_3", "x_4", "x_5"], config=config);